package com.dnb.webmash.playmaker.server;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.StringTokenizer;
import java.util.logging.Level;

//code from http://java.sun.com/developer/technicalArticles/ThirdParty/WebCrawler/
//by musclefish http://www.musclefish.com/
//TODO understand this, optimize it, and use it in URLtoPlaylistServiceImpl
public final class RobotSafe {
	 public static final String DISALLOW = "Disallow:";
	 public static final String USERAGENT = "User-agent:";//TODO CONTINUE HERE
	 static boolean isRobotSafe(String urlName) {
		 	URL url;
			try {
				url = new URL(urlName);
			} catch (MalformedURLException e1) {
				return false;
				}
			
			String strHost = url.getHost();

			// form MyURL of the robots.txt file
			String strRobot = "http://" + strHost + "/robots.txt";
			URL urlRobot;
			try { 
			    urlRobot = new URL(strRobot);
			} catch (MalformedURLException e) {
			    // something weird is happening, so don't trust it
			    return false;
			}

			String strCommands;
			try {
			    InputStream urlRobotStream = urlRobot.openStream();

			    // read in entire file
			    byte b[] = new byte[1000];
			    int numRead = urlRobotStream.read(b);
			    strCommands = new String(b, 0, numRead);
			    while (numRead != -1) {
				numRead = urlRobotStream.read(b);
				if (numRead != -1) {
				    String newCommands = new String(b, 0, numRead);
				    strCommands += newCommands;
				}
			    }
			    urlRobotStream.close();
			} catch (IOException e) {
			    // if there is no robots.txt file, it is OK to search
			    return true;
			}

			// search for "Disallow:" commands that apply to us
			String strURL = url.getFile();
			int index = 0;
			while ((index = strCommands.indexOf(DISALLOW, index)) != -1) {
			    index += DISALLOW.length();
			    String strPath = strCommands.substring(index);
			    StringTokenizer st = new StringTokenizer(strPath);

			    if (!st.hasMoreTokens())
				break;
			    
			    String strBadPath = st.nextToken();

			    // if the MyURL starts with a disallowed path, it is not safe
			    if (strURL.indexOf(strBadPath) == 0)
				return false;
			}

			return true;
		    }
}
